# setup includes libraries and functions
source("RSS_functions.R",chdir=T)RSS Feeds
R
Javascript
tidyRSS
There are more research papers being published than we could ever consume. This RSS Reader gathers the arXiv papers published today and allows us to search through by keywords.
Inspired by an article from InfoWorld (Machlis 2022). Future feads will include more research papers from platforms into a database allowing for a more thorough historical search.
Content of RSS_functions.R script
# get path to script
pathway <- here::here("posts","RSS_Reader", "RSS_functions.R")
# generate output of lines from script
lines <- readLines(pathway, warn=FALSE)
cat(lines, sep = "\n")# Get the most recent papers function
# Required Libraries
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(dplyr))
library(DT)
library(tidyRSS)
most_recent_test <- function(source) {
site=source
my_feed_data <- tidyfeed(site) |>
select(feed_pub_date,item_title, item_link, item_description)
my_feed_data_summary <- my_feed_data |>
select(item_title, feed_pub_date, item_link,
item_description)
#changed item_title to item_desc
my_rss_feed <- my_feed_data_summary |> mutate(
item_title = str_glue("<a target='_blank' title='{item_title}' href='{item_link}' rel='noopener'>{item_title}</a>")
)
my_rss_feed_table <- my_rss_feed |> select(-item_link)
#my_feed_data_summary
return(my_rss_feed_table)
}
most_recent <- function(source) {
tryCatch({
site <- source
my_feed_data <- tidyfeed(site) |>
select(feed_pub_date, item_title, item_link, item_description)
my_feed_data_summary <- my_feed_data |>
select(item_title, feed_pub_date, item_link, item_description)
my_rss_feed <- my_feed_data_summary |> mutate(
item_title = str_glue("<a target='_blank' title='{item_title}' href='{item_link}' rel='noopener'>{item_title}</a>")
)
# Return only the most recent record
my_rss_feed_table <- my_rss_feed |>
arrange(desc(feed_pub_date)) |>
#slice(1) |>
select(-item_link)
return(my_rss_feed_table)
}, error = function(e) {
message("Error fetching or parsing feed: ", e$message)
#return(NA)
})
}
#my_rss_feed_table
Getting feed for arXiv and checking the number of results. There are no papers on weekends
flag=0 #Set Error flag to False
# Call functins in RSS_functions.R
my_feed <- "https://rss.arxiv.org/rss/cs.LG"
recent_records<-most_recent(my_feed)GET request successful. Parsing...
#paste("recent records: ", recent_records)
#print (result_output)
# Sets flag = 1 when no records retrieved
if (is.null(recent_records) || (length(recent_records) == 1 && is.na(recent_records))) {
flag <- 1
cat("Sorry, no papers published today\n")
} else {
cat("Records Retrieved:", nrow(recent_records), "\n")
flag <- 0
}Records Retrieved: 911
cat("flag:", flag, "\n")flag: 0
Creating output using DT library
if (!flag) {
DT::datatable(recent_records, filter = 'top', escape = FALSE, rownames = FALSE,
options = list(
search = list(regex = TRUE, caseInsensitive = TRUE),
pageLength = 10,
lengthMenu = c(10, 25, 50, 100, 200),
autowidth = TRUE,
columnDefs = list(list(width = '80%', targets = list(2)))
)
)}Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
References
Machlis, Sharon. 2022. “How to Create Your Own RSS Reader with r _ InfoWorld.” InfoWorld, December. https://www.infoworld.com/article/2337738/how-to-create-your-own-rss-reader-with-r.html.